require(tidyverse)

rm(list = ls())
gc()

################################################################################
##
## Date:    2024-12-20
## Author:  james.h.bisbee@vanderbilt.edu
## Purpose: This script generates Figure 2.
## Inputs:  /scratch/jhb362/zilinsky_2023/data/results/VIMP_ranger/VIMP_2024.*-months.*FALSE_temp-chg12.RData
##            - Variable importance results generated by NFR_vimp_prep.R
##            - Summarized on the NYU HPC into PSRM_simple_months_chg12.RData via NFR_data_prep.R
## Outputs: ./figures/fig2.pdf
##
################################################################################

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

sessionInfo()

load('./data/VIMP_ranger/PSRM_simple_months_chg12.RData')


lookup <- vimp %>%
  count(outcome)

lookup$labs <- c("You are proud of your community or the area where you live.",
                 "At this time, are you cutting back on how\nmuch money you spend each week, or not?",
                 "How would you rate economic\nconditions in this country today?",
                 "[BINARY] How would you rate economic\nconditions in this country today?",
                 "Have there been times in the past twelve months when you did not\nhave enough money to buy food that you or your family needed?",
                 "Are you feeling better about your\nfinancial situation these days, or not?",
                 "Are you feeling pretty good these days about the\namount of money you have to spend, or not?",
                 "Did you experience the following feelings\nduring a lot of the day yesterday? Happiness",
                 "Do you have a favorable or unfavorable opinion of\nHillary Clinton, or haven't you heard of them?",
                 "On which step of the ladder would you say\nyou will stand about five years from now?",
                 "On which step of the ladder would you say\nyou personally feel you stand at this time?",
                 "Would you be able right now to make a major purchase, such as a car,\nappliance, or furniture, or pay for a significant home repair if you needed to?",
                 "[AGREE/DIS] You have more than enough\nmoney to do what you want to do.",
                 "In the last seven days, you have worried about money.",
                 "Did you worry yesterday that you\nspent too much money, or not?",
                 "Right now, do you think that economic conditions in this\ncountry, as a whole, are getting better or getting worse?",
                 "[BINARY] Right now, do you think that economic conditions in this\ncountry, as a whole, are getting better or getting worse?",
                 "Do you approve or disapprove of the way\nBarack Obama is handling his job as president?",
                 "Did you experience the following feelings\nduring a lot of the day yesterday? Sadness",
                 "Are you satisfied or dissatisfied with your standard\nof living, all the things you can buy and do?",
                 "Compared to the people you spend time with,\nyou are satisfied with your standard of living.",
                 "Did you experience the following feelings\nduring a lot of the day yesterday? Stress",
                 "Do you approve or disapprove of the way\nDonald Trump is handling his job as president?",
                 "Do you have a favorable or unfavorable opinion of\nVA governor, or haven't you heard of them?",
                 "[AGREE/DIS] You are watching your\nspending very closely.",
                 "Did you experience the following feelings\nduring a lot of the day yesterday? Worry")

toplot <- vimp %>%
  select(-bsInd) %>%
  group_by(vars,outcome) %>%
  summarise_all(list(mean = ~mean(.,na.rm=T),
                     lb = ~quantile(.,.01,na.rm=T),
                     ub = ~quantile(.,.99,na.rm=T))) %>%
  ungroup() %>%
  left_join(lookup)


toplotBar <- toplot %>%
  filter(outcome %in% c('ENOUGHMON','ECON')) %>%
  filter(grepl('[A-Z]+$|Total|_tot_|DEM_|_ur|_lfpr',vars)) %>%
  filter(!grepl('lifexp|emplvl|oth',vars)) %>%
  select(-matches('^vimp_')) %>%
  mutate(vars = gsub('Annualinc','Annual Income',gsub('Agecat','Age',gsub('Inc','Income',gsub('Workingage','Working Age',gsub('Deathrate','Death Rate',gsub('Lfpr','LFPR',gsub('Ur','Unemp Rate',str_to_title(gsub('ECONWkly','ECON: Avg',gsub('pct','%',gsub('(^[A-Z]+$)','INDIV: \\1',gsub('aww','Wkly Wages',gsub('_',' ',gsub('(^[A-Z]+)_','\\1: ',gsub('_imputed|_Total|_tot_','',vars))))))))))))))))

ords <- toplotBar %>% select(vars,outcome,relVimp_mean) %>% spread(outcome,relVimp_mean) %>%
  mutate(diff = abs(ENOUGHMON - ECON))

pdf('./figures/fig2.pdf',width = 7,height = 5)
toplotBar %>%
  mutate(vars = factor(vars,levels = (ords %>% arrange(ECON) %>% .$vars))) %>%
  ggplot(aes(x = relVimp_mean,y = vars,color = outcome))+ 
  geom_point(alpha = 0) + 
  geom_segment(data = toplotBar %>% select(vars,outcome,relVimp_mean) %>% spread(outcome,relVimp_mean) %>%
                 filter(ENOUGHMON > ECON),
               aes(x = ENOUGHMON,y = vars,xend = ECON,yend = vars),inherit.aes = F,color = 'orange') + 
  geom_segment(data = toplotBar %>% select(vars,outcome,relVimp_mean) %>% spread(outcome,relVimp_mean) %>%
                 filter(ENOUGHMON < ECON),
               aes(x = ENOUGHMON,y = vars,xend = ECON,yend = vars),inherit.aes = F,color = 'grey60') + 
  geom_segment(aes(x = relVimp_lb,y = vars,xend = relVimp_ub,yend = vars),linewidth = 5) + 
  scale_color_manual(values = c('ECON' = 'orange','ENOUGHMON' = 'grey60'),
                     labels = c('ENOUGHMON' = 'Enough money to buy food\nthat you or your family needed?',
                                'ECON' = 'How would you rate economic\nconditions in this country today?')) + 
  theme_bw() + 
  scale_x_continuous(labels = scales::percent) + 
  labs(x = '% Reduction in error',
       y = NULL,
       color = 'Outcome',
       title = 'Variable importance: comparing two outcomes',
       subtitle = 'Permutation tests using random forests')  +
  guides(color=guide_legend(
    keywidth=0.2,
    keyheight=0.4,
    default.unit="inch")
  )
dev.off()

# EOF